Author

Zeju Li

Fast Gradient Sign Attack (FGSM)

fgsm_panda_image.png reference: Lan, et al. Explaining and Harnessing Adversarial Examples, ICLR, 2015.

## Installing corresponding python packages & downloading model weights
# !pip install torch requests Pillow transformers matplotlib
# !cd model_weights
# !git lfs install
# !git clone https://hf-mirror.com/Salesforce/blip-image-captioning-base
import torch
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import matplotlib.pyplot as plt
/home/ll_25113060022/anaconda3/envs/undergraduates_attack_blip/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
/home/ll_25113060022/anaconda3/envs/undergraduates_attack_blip/lib/python3.10/site-packages/transformers/utils/hub.py:111: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
  warnings.warn(
# 设置设备
device = "cuda" if torch.cuda.is_available() else "cpu"

# 加载模型和处理器
model_name = "/cpfs01/projects-HDD/cfff-906dc71fafda_HDD/ll_25113060022/undergraduates_course_copy/adversarial_attack/advanced/model_weights/blip-image-captioning-base"
processor = BlipProcessor.from_pretrained(model_name)
model = BlipForConditionalGeneration.from_pretrained(model_name).to(device)
model.eval()  # 设置为评估模式

# 下载示例图像
url = "https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg"
image = Image.open(requests.get(url, stream=True).raw).convert('RGB')
display(image)  # 显示原始图像
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.

# 预处理图像
def preprocess_image(image):
    inputs = processor(images=image, return_tensors="pt")
    return inputs.pixel_values.to(device)

image_tensor = preprocess_image(image).requires_grad_(True)

# 设置文本提示
text_prompt = "a photography of"
text_inputs = processor(text=text_prompt, return_tensors="pt").to(device)

input_ids = text_inputs.input_ids
with torch.no_grad():
    original_output = model.generate(
            pixel_values=image_tensor, 
            input_ids=input_ids,
            max_length=50
        )
    original_caption = processor.decode(original_output[0], skip_special_tokens=True)
print(f"the original caption of above figure is: \n{original_caption}")
the original caption of above figure is: 
a photography of a woman and her dog on the beach
# FGSM attack
loss = model(
            pixel_values=image_tensor, 
            input_ids=original_output,
            labels=original_output
        ).loss

model.zero_grad()
loss.backward()
    
# 获取图像梯度
data_grad = image_tensor.grad.data
sign_data_grad = data_grad.sign()

# 应用 FGSM 扰动
epsilon = 0.3
perturbed_image = image_tensor + epsilon * sign_data_grad
    
 # 裁剪到有效范围 (保持归一化空间)
perturbed_image = torch.clamp(perturbed_image, -3.0, 3.0)  # 基于 BLIP 的归一化范围

with torch.no_grad():
        
    # 对抗样本描述
    adversarial_output = model.generate(
            pixel_values=perturbed_image.detach(), 
            input_ids=input_ids,
            max_length=50
        )
    adversarial_caption = processor.decode(adversarial_output[0], skip_special_tokens=True)
print(f"the caption of adversarial attack sample: \n{adversarial_caption}")
the caption of adversarial attack sample: 
a photography of a couple sitting in the sand
# 反归一化函数(用于可视化)
def denormalize_image(tensor):
    # BLIP 使用的归一化参数:mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]
    mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).view(1, 3, 1, 1).to(device)
    std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).view(1, 3, 1, 1).to(device)
    return tensor * std + mean

# 可视化结果
def visualize_results(original, perturbed, original_caption, adversarial_caption, title):
    # 反归一化图像用于显示
    original_denorm = denormalize_image(original).squeeze(0).permute(1, 2, 0).cpu().detach().numpy()
    perturbed_denorm = denormalize_image(perturbed).squeeze(0).permute(1, 2, 0).cpu().detach().numpy()
    
    # 创建图表
    fig, axes = plt.subplots(1, 2, figsize=(15, 6))
    fig.suptitle(title, fontsize=16)
    
    # 原始图像和描述
    axes[0].imshow(original_denorm)
    axes[0].set_title("Original Image")
    axes[0].axis('off')
    axes[0].text(0.5, -0.15, f"Caption: {original_caption}", 
                ha='center', transform=axes[0].transAxes, fontsize=12)
    
    # 对抗样本和描述
    axes[1].imshow(perturbed_denorm)
    axes[1].set_title(f"Adversarial Image (ε={epsilon})")
    axes[1].axis('off')
    axes[1].text(0.5, -0.15, f"Caption: {adversarial_caption}", 
                ha='center', transform=axes[1].transAxes, fontsize=12)
    
    plt.tight_layout()
    plt.show()
# 显示非定向攻击结果
visualize_results(
    image_tensor, 
    perturbed_image, 
    original_caption, 
    adversarial_caption,
    "FGSM Attack on BLIP Model"
)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [0.060587585..1.082733].

Back to top

Reuse

Citation

For attribution, please cite this work as:
Li, Zeju. n.d. “Fast Gradient Sign Attack (FGSM).” https://zerojumpline.github.io//teaching/2025-08-15-Undergraduate Project/safety.html.